library("tidyverse")
## ── Attaching packages ─────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.5
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggpubr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(ggrepel)
source(file.path(".","..","R","ASE_functions.R"))
source(file.path(".","..","R","PerformDiffAIAnalysisFor2Conditions.R"))
removeX <- function(DF, legitim_chrgenes){
return(DF[DF$ensembl_gene_id %in% legitim_chrgenes$gene, ])
}
chrgenes <- read.delim('../../../data/Mus_musculus.GRCm38.68.chrgenes.txt', col.names = c('chr', 'gene'))
inTabs <- paste0("../../../data/kidney/full/",
c("NEB", "SMARTseq10ng", "SMARTseq100pg"),
"_processed_gene_extended2.txt")
inDF18 <- removeX(GetGatkPipelineTabs(inTabs, c(6,6,6)), chrgenes)
## Parsed with column specification:
## cols(
## .default = col_character(),
## ref_rep1 = col_integer(),
## alt_rep1 = col_integer(),
## ref_rep2 = col_integer(),
## alt_rep2 = col_integer(),
## ref_rep3 = col_integer(),
## alt_rep3 = col_integer(),
## ref_rep4 = col_integer(),
## alt_rep4 = col_integer(),
## ref_rep5 = col_integer(),
## alt_rep5 = col_integer(),
## ref_rep6 = col_integer(),
## alt_rep6 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
## .default = col_character(),
## ref_rep1 = col_integer(),
## alt_rep1 = col_integer(),
## ref_rep2 = col_integer(),
## alt_rep2 = col_integer(),
## ref_rep3 = col_integer(),
## alt_rep3 = col_integer(),
## ref_rep4 = col_integer(),
## alt_rep4 = col_integer(),
## ref_rep5 = col_integer(),
## alt_rep5 = col_integer(),
## ref_rep6 = col_integer(),
## alt_rep6 = col_integer()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
## .default = col_character(),
## ref_rep1 = col_integer(),
## alt_rep1 = col_integer(),
## ref_rep2 = col_integer(),
## alt_rep2 = col_integer(),
## ref_rep3 = col_integer(),
## alt_rep3 = col_integer(),
## ref_rep4 = col_integer(),
## alt_rep4 = col_integer(),
## ref_rep5 = col_integer(),
## alt_rep5 = col_integer(),
## ref_rep6 = col_integer(),
## alt_rep6 = col_integer()
## )
## See spec(...) for full column specifications.
inDF18
Ph <- c(0.68, 0.95, 0.98)
RESULT18_Q_humanread <- lapply(Ph, function(p){
PerformDiffAIAnalysisFor2Conditions(inDF18, vect1CondReps=3:4, vect2CondReps=7:8, Q=p, fullOUT=T, BF=F)
})
DF18_covai <- RESULT18_Q_humanread[[1]]$deltaAIPairwise
DF18_covai <- DF18_covai[DF18_covai$group=="Condition2 01 vs 02", c("deltaAI", "MeanCov", "AI1")]
DF18_hobq <- do.call(rbind, lapply(1:length(RESULT18_Q_humanread), function(i){
df = RESULT18_Q_humanread[[i]]$observedQuartiles
df = df[df$condition=="Condition2" & df$ij=="01 vs 02", ]
df[df$binNObservations >= 20, c("coverageBin", "deltaAI", "Q")]
}))
gg_covai <- ggplot() +
geom_point(data=DF18_covai, aes(MeanCov, deltaAI), size=0.5, color="gray") +
geom_line(data=DF18_hobq[DF18_hobq$coverageBin>=10, ], aes(coverageBin, deltaAI, color=Q)) +
geom_point(data=DF18_hobq[DF18_hobq$coverageBin>=10, ], aes(coverageBin, deltaAI, color=Q), size=0.5) +
labs(x = "Mean Gene Coverage", y = "Allelic Imbalance difference") +
ggtitle("") +
theme_bw() +
theme(legend.position = c(0.8, 0.8), text = element_text(size=12)) +
xlim(0, 3500)
cowplot::save_plot(gg_covai, file="Fig_tree_with_Qs.pdf", base_height = 4)
## Warning: Removed 770 rows containing missing values (geom_point).
gg_covai_lin <- ggplot(data=DF18_hobq[DF18_hobq$coverageBin>=10, ], aes(x=coverageBin, y=deltaAI, group=Q, color=Q)) +
geom_point(size=0.5) +
theme_bw() +
ggtitle("") +
xlab('Mean Gene Coverage') + ylab('Quantile(AI)') +
theme(legend.position=c(0.8, 0.8), text = element_text(size=12)) +
scale_x_continuous(trans='log2') +
scale_y_continuous(trans='log2') +
geom_smooth(method='lm', formula = y ~ offset(-0.5*x), aes(group=Q)) +
theme(panel.background = element_rect(fill = 'white', colour = 'white'),
legend.key = element_rect(fill = F, colour = "white")) +
guides(color=guide_legend(override.aes=list(fill=NA)))
cowplot::save_plot(gg_covai_lin, file="Fig_parallel_lines.pdf", base_height = 4)
HQint <- sapply(1:length(Ph), function(i){RESULT18_Q_humanread[[i]]$intercepts$Condition2}$linInt)
HQint_Zscore <- rbind(data.frame(quantile = Ph,
observation = HQint/HQint[2],
x = "Correction constant"),
data.frame(quantile = Ph,
observation = qnorm(Ph + (1-Ph)/2) / qnorm(0.84),
x = "Binomial Z-score"))
HQint_Zscore$x <- factor(HQint_Zscore$x)
gg_qcoeff <- ggplot(HQint_Zscore, aes(x, observation, color=x)) +
geom_point(aes(color=x)) +
geom_label_repel(aes(label = quantile),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50',
direction = "y",
hjust = 0) +
labs(x = "Type", y = "sd proportion") +
ggtitle("") +
theme_bw() +
theme(legend.position = c(0.5, 0.9), text = element_text(size=12), legend.title = element_blank())
Plot with coefficients:
# load the data
method_lib <- c("NEB 100ng", "SMARTseq 10 ng", "SMARTseq 100 pg")
INTC18_90_humanread <- do.call(rbind, lapply(0:2, function(i){
C = (1:6)+i*6
data.frame(PerformDiffAIAnalysisFor2Conditions(inDF18, vect1CondReps=3:4,
vect2CondReps=C, Q=0.90, fullOUT=T, BF=F)$intercepts$Condition2[, c("ij","linInt")],
method = method_lib[i+1])
}))
INTC18_90_humanread$method <- factor(INTC18_90_humanread$method)
# plot with jitter
gg_mcoeff <- ggplot(INTC18_90_humanread, aes(method, linInt, color=method)) +
geom_jitter(aes(color=method)) +
labs(x = "Lib Preparation Method", y = "Observed correction constant") +
ggtitle("") +
theme_bw() +
theme(legend.position = c(0.3, 0.8), text = element_text(size=12))
cowplot::save_plot(gg_mcoeff, file="Fig_coeff.pdf", base_height = 4)